/*
********************************************************************************
Description:		Dynamic effects after reweighting

Uses:				"$mergedata\CouldhaveSample.dta"

Saves:				"${results}\ReweightedRegressions_byIncSource_orig.csv"
					"${results}\ReweightedRegressions_byIncSource_weighted.csv"
					"${results}\ReweightedRegressions_byIncSource_norm_weighted.csv"

********************************************************************************
*/



***************************************
*FIGURE A2: Dyanmics after reweighting*
***************************************
* Regressions by income source, reweighting to make sample in each reg look same on observables as in the self employment case

use   "$mergedata\CouldhaveSample.dta", clear  

* Creat age bands
qui replace age = . if (age > 120 | age < 16)
gen byte ageband = 1 if (age < 40)
qui replace ageband = 2 if (age >= 40) & (age < 65 - 5*female) & (female < .)
qui replace ageband = 3 if (age >= 65 - 5*female) & (age < .) & (female < .)
qui replace ageband = 0 if (age == .)
label define ageband 0 "unknown" 1 "Under 40" 2 "Over 40, under SPA" 3 "Over SPA"
label values ageband ageband

*generate a set of bins 
gquantiles yrsfiling_quant=yrsfiling, xtile nquantiles(4)
gquantiles maxyrssince_quant=maxyrssince, xtile nquantiles(4)
gegen int group_var = group(+ female + ageband + yrsfiling_quant + maxyrssince_quant), counts(group_count) replace

*generate counts of obs by group for each type of reg 
file open resultsfh using "${results}\ReweightedRegressions_byIncSource_orig.csv", write text replace
foreach incvar in empinc sempinc propinc pensinc divinc {
	qui directctrlreg, outcome(`incvar'_cpi) ctrlvars(survives) trimlevel(1)
	qui gen byte touse_`incvar'=e(sample)

	directctrlreg, outcome(`incvar'_cpi) ctrlvars(survives) trimlevel(1) fh(resultsfh) header printtoscreen

	*generate a tempvar simply for the purpose of creating the count per group (`incvar'_count)
	tempvar a 
	qui gegen int `a' = group(+ female + ageband + yrsfiling_quant + maxyrssince_quant) if touse_`incvar', counts(`incvar'_count) replace
}
file close resultsfh 


*for each inc source, count number of ppl who appear in each bin, then do the regression using weighting (repeat for semp just as a check no change).
file open resultsfh using "${results}\ReweightedRegressions_byIncSource_weighted.csv", write text replace
foreach incvar in empinc sempinc propinc pensinc divinc {
	qui gen float `incvar'_weight = sempinc_count/`incvar'_count
	
	directctrlreg if touse_`incvar' [aweight=`incvar'_weight], outcome(`incvar'_cpi) ctrlvars(survives) trimlevel(1) fh(resultsfh) header printtoscreen
}
file close resultsfh 

*now normalise to 1 in yr of audit
foreach incvar in it_cl4_cgt empinc sempinc propinc pensinc divinc {
	tempvar incvar_norm incvar_norm2 
	gen double `incvar_norm' = `incvar'_cpi if yrssince20==20
	gegen double `incvar_norm2' = max(`incvar_norm' ), by(utr_no)
	gen double `incvar'_norm = `incvar'/`incvar_norm2'  //normalise to 1 at individual level based on audit year 
}

*for each inc source, count number of ppl who appear in each bin, then do the regression using weighting (repeat for semp just as a check no change).
file open resultsfh using "${results}\ReweightedRegressions_byIncSource_norm_weighted.csv", write text replace
foreach incvar in empinc sempinc propinc pensinc divinc {
	qui gen float `incvar'_weight = sempinc_count/`incvar'_count
	
	directctrlreg if touse_`incvar' [aweight=`incvar'_weight], outcome(`incvar'_norm) ctrlvars(survives) trimlevel(1) fh(resultsfh) header printtoscreen	
}
file close resultsfh 

